Data for this study come from the subset of responses collected on the SAPA-project.org website between February 17, 2017 and July 22, 2019. The initial date is the day that the semi-random presentation of items to participants was changed to increase presentation of SPI-135 items, which are the basis for personality measurement in this study. This period also represents a new period of data collection on SAPA containing data that are not available in the public domain at the time of analysis. The end date of data collection was the first day following preregistration of analysis that the authors were able to analyze data.
set.seed(052319)
# load packages
packages = c("tidyverse", "janitor", "psych", "devtools",
"PAutilities", "measurements", "here", "caret")
lapply(packages, library, character.only = TRUE)
rm(packages)
#read in data
load(here("../../data/SAPA/collaboration/SAPAdata07feb2017thru22jul2019forSara2.rdata"))
sapa = SAPAdata07feb2017thru22jul2019x
source(here("scripts/personality_scales.R"))
keys = read.csv("data/superKey.csv", header = TRUE, row.names = 1)
# super key -- this contains the master key list for all of SAPA. every item ever administered and every scale you can score
# each row is a single item
# each column is a scale
# the value of a cell is 0 if that item is not part of that scale, 1 if that item positively loads on the scale, and -1 if the item negatively loads on the scale
Participants were included in the analysis if they were under the age of 18, from the United States, and had reported their biological sex at birth, height, and weight.
# remove participants who are 18 years or older and from the US
sapa = sapa %>%
filter(age < 18) %>%
filter(country == "USA") %>%
filter(!is.na(sex)) %>%
filter(!is.na(height)) %>%
filter(!is.na(weight))
Education was rescored to represent years of education. All parental SES variables – education, estimated income and estimated prestige, were standardized to the sample and averaged to create a single index of parental SES.
# make sure occupational variables are numeric
sapa = sapa %>%
mutate_at(vars(matches("^(p)\\d(occ)")), as.numeric)
#or years
sapa = sapa %>%
mutate(p1edu = case_when(
p1edu == "less12yrs" ~ "6",
p1edu == "HSgrad" ~ "12",
p1edu == "SomeCollege" ~ "14",
p1edu == "CurrentInUniv" ~ "14",
p1edu == "AssociateDegree" ~ "14",
p1edu == "CollegeDegree" ~ "16",
p1edu == "InGradOrProSchool" ~ "18",
p1edu == "GradOrProDegree" ~ "20"))
sapa = sapa %>%
mutate(p2edu = case_when(
p2edu == "less12yrs" ~ "6",
p2edu == "HSgrad" ~ "12",
p2edu == "SomeCollege" ~ "14",
p2edu == "CurrentInUniv" ~ "14",
p2edu == "AssociateDegree" ~ "14",
p2edu == "CollegeDegree" ~ "16",
p2edu == "InGradOrProSchool" ~ "18",
p2edu == "GradOrProDegree" ~ "20"))
sapa$p1edu = as.numeric(sapa$p1edu)
sapa$p2edu = as.numeric(sapa$p2edu)
sapa = sapa %>%
filter(!is.na(p1edu) | !is.na(p2edu) |
!is.na(p1occIncomeEst) | !is.na(p2occIncomeEst) |
!is.na(p1occPrestige) | !is.na(p2occPrestige))
#estimate SES composite
sapa = sapa %>%
mutate(z.p1edu = scale(p1edu),
z.p2edu = scale(p2edu),
z.p1occIncomeEst = scale(p1occIncomeEst),
z.p2occIncomeEst = scale(p2occIncomeEst),
z.p2occPrestige = scale(p1occPrestige),
z.p2occPrestige = scale(p2occPrestige))
sapa$ses = rowMeans(sapa[,grepl("^z\\.", names(sapa))], na.rm=T)
sapa = sapa %>%
dplyr::select(-starts_with("z"))
Big Five traits were scored using a sum-score method, averaged across non-missing responses.
# select just the rows that correspond to variables in the current SAPA dataset
vars = names(sapa)
keys = keys[rownames(keys) %in% vars, ]
# select just the Big 5 scales that are scored using the SPI_135 form
bfkeys = keys %>%
select(contains("SPI_135")) %>%
select(1:5)
bfkeys = keys2list(as.matrix(bfkeys), sign = T)
# score the items (this contains item and scale statistics too!)
b5scored = scoreItems(keys = bfkeys, items = sapa)
# add scores to SAPA
b5scores = as.data.frame(b5scored$scores[,1:5])
names(b5scores) = gsub("135_27_5_", "", names(b5scores))
sapa = cbind(sapa, b5scores)
The narrower traits, the SPI-27, were scored using IRT scoring. Calibration parameters were taken from a different dataset and are available on request.
load(here("../../data/SAPA/IRTinfoICAR.rdata"))
# IRT score
dataSet <- subset(sapa, select = c(orderForItems))
ICARirtScores <- matrix(nrow=dim(dataSet)[1], ncol=5)
ICARirtSEs <- matrix(nrow=dim(dataSet)[1], ncol=5)
for (i in 1:length(IRToutputICAR)) {
data <- subset(dataSet, select = c(names(IRToutputICAR[[i]]$irt$difficulty[[1]])))
calibrations <- IRToutputICAR[[i]]
scored <- scoreIrt(calibrations, data, keys = NULL, cut = 0)
irt.data <- irt.se(calibrations, score = as.matrix(scored[,1]))
TScoring <- (irt.data[,"scores"]-thetaNormsMeans[i])/thetaNormsSDs[i]
TScores <- TScoring*10+50
ICARirtScores[,i] <- TScores
TScoreSEs <- irt.data[,"se"]*10
ICARirtSEs[,i] <- TScoreSEs
rm(TScores, TScoring, TScoreSEs, scored, calibrations, data)
}
ICARirtScores <- as.data.frame(ICARirtScores)
colnames(ICARirtScores) <- scaleNames
ICARirtSEs <- as.data.frame(ICARirtSEs)
colnames(ICARirtSEs) <- scaleNames
rm(IRToutputICAR, thetaNormsMeans, thetaNormsSDs, scaleNames)
#add to sapa dataset
sapa = cbind(sapa, ICARirtScores)
# remove individual items
sapa = sapa %>%
select(-contains("q_"))
Cognition was also scored using IRT scoring, with calibrations from a separate dataset.
load(here("../../data/SAPA/IRTinfoICAR.rdata"))
# IRT score
dataSet <- subset(sapa, select = c(orderForItems))
ICARirtScores <- matrix(nrow=dim(dataSet)[1], ncol=5)
ICARirtSEs <- matrix(nrow=dim(dataSet)[1], ncol=5)
for (i in 1:length(IRToutputICAR)) {
data <- subset(dataSet, select = c(names(IRToutputICAR[[i]]$irt$difficulty[[1]])))
calibrations <- IRToutputICAR[[i]]
scored <- scoreIrt(calibrations, data, keys = NULL, cut = 0)
irt.data <- irt.se(calibrations, score = as.matrix(scored[,1]))
TScoring <- (irt.data[,"scores"]-thetaNormsMeans[i])/thetaNormsSDs[i]
TScores <- TScoring*10+50
ICARirtScores[,i] <- TScores
TScoreSEs <- irt.data[,"se"]*10
ICARirtSEs[,i] <- TScoreSEs
rm(TScores, TScoring, TScoreSEs, scored, calibrations, data)
}
ICARirtScores <- as.data.frame(ICARirtScores)
colnames(ICARirtScores) <- scaleNames
ICARirtSEs <- as.data.frame(ICARirtSEs)
colnames(ICARirtSEs) <- scaleNames
rm(IRToutputICAR, thetaNormsMeans, thetaNormsSDs, scaleNames)
#add to sapa dataset
sapa = cbind(sapa, ICARirtScores)
# remove individual items
sapa = sapa %>%
select(-contains("q_"))
BMI percentile represents a participant’s percentile score on BMI relative to others of their assigned sex at birth. These were estimated from the PAutilities package, developed by WHO Multicentre Growth Reference Study (MGRS) information about the development of these reference standards can be found at https://www.cdc.gov/obesity/childhood/defining.html. These standards in turn were develoed using the 2000 CDC growth charts, based on data from 5 national health examination surveys that occurred from 1963 to 1994 and supplemental data from surveys that occurred from 1960 to 1995.
Kuczmarski RJ, Ogden CL, Guo SS, et al. 2000 CDC growth charts for the United States: methods and development. National Center for Health Statistics. Vital Health Stat 11. 2002;(246):1-190
BMI category is assigned based on BMI percentile: participants in the bottom 10% are labeled Underweight, between the top 10% and 5% are Overweight, and top 5% are Obese. All others are labelled Normal.
All analyses were perfomed separately by gender.
sapa = sapa %>%
mutate(cog = ICAR60) %>%
select(sex, BMI, BMI_p, BMI_c, p1edu,
p1occPrestige, p1occIncomeEst, p2edu,
p2occPrestige, p2occIncomeEst, ses, cog, contains("SPI"))
sapa_male = sapa %>%
filter(sex == "male") %>%
dplyr::select(-sex)
sapa_female = sapa %>%
filter(sex == "female") %>%
dplyr::select(-sex)
save(b5scored, file = here("data/alpha.Rdata"))
The datasets were split into training (75%) and test (25%) sets; all regression models are estimated using the training sets. The test set was reserved to estimate model accuarcy, comparing models with different sets of individual differences.
# set seed
set.seed(090919)
# parition into training and test sets. objects identify just training rows
train_male = createDataPartition(sapa_male$BMI_c, p = .75, list = FALSE)
train_female = createDataPartition(sapa_female$BMI_c, p = .75, list = FALSE)
Descriptive statistics are estimated using the psych package.
descriptives = describeBy(sapa, group = "sex")
library(kableExtra)
#pull descriptives into a list
descriptives.df = data.frame(gender = names(descriptives))
descriptives.df$data = descriptives
#add variable names and unnest
descriptives.df = descriptives.df %>%
mutate(data = map(data, function(x) mutate(x, vars = rownames(x)))) %>%
unnest(cols = c(data))
descriptives.df %>%
filter(gender == "female") %>%
select(-gender) %>%
kable(., digits = 2) %>%
kable_styling()
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| sex* | 6530 | 1.00 | 0.00 | 1.00 | 1.00 | 0.00 | 1.00 | 1.00 | 0.00 | 0.00 | ||
| BMI | 6502 | 23.07 | 5.00 | 21.85 | 22.38 | 3.65 | 15.01 | 52.78 | 37.77 | 1.65 | 3.85 | 0.06 |
| BMI_p | 6530 | 62.70 | 27.61 | 67.80 | 64.88 | 30.99 | 0.00 | 99.80 | 99.80 | -0.53 | -0.80 | 0.34 |
| BMI_c* | 6530 | Inf | -Inf | -Inf | ||||||||
| p1edu | 6249 | 15.25 | 3.58 | 16.00 | 15.52 | 2.97 | 6.00 | 20.00 | 14.00 | -0.64 | 0.56 | 0.05 |
| p1occPrestige | 5723 | 60.76 | 14.64 | 67.12 | 62.14 | 13.55 | 24.22 | 79.09 | 54.87 | -0.82 | -0.42 | 0.19 |
| p1occIncomeEst | 5594 | 61625.23 | 21784.89 | 60244.00 | 60512.80 | 20228.59 | 21980.00 | 112490.00 | 90510.00 | 0.39 | -0.24 | 291.27 |
| p2edu | 5876 | 14.62 | 3.66 | 14.00 | 14.87 | 2.97 | 6.00 | 20.00 | 14.00 | -0.53 | 0.36 | 0.05 |
| p2occPrestige | 4818 | 57.87 | 15.76 | 63.85 | 58.75 | 16.23 | 24.22 | 79.09 | 54.87 | -0.47 | -1.04 | 0.23 |
| p2occIncomeEst | 4729 | 59058.07 | 22926.91 | 55790.00 | 57519.07 | 24640.81 | 21980.00 | 112490.00 | 90510.00 | 0.53 | -0.34 | 333.40 |
| ses | 6528 | -0.04 | 0.76 | 0.05 | 0.00 | 0.76 | -2.56 | 2.32 | 4.89 | -0.62 | 0.43 | 0.01 |
| cog | 6507 | 48.06 | 9.19 | 48.02 | 48.00 | 9.73 | 18.46 | 76.53 | 58.07 | 0.03 | -0.35 | 0.11 |
| SPI_Agree | 6530 | 4.26 | 0.67 | 4.36 | 4.31 | 0.53 | 1.00 | 6.00 | 5.00 | -0.81 | 1.35 | 0.01 |
| SPI_Consc | 6530 | 3.87 | 0.67 | 3.86 | 3.87 | 0.64 | 1.00 | 6.00 | 5.00 | -0.05 | 0.36 | 0.01 |
| SPI_Extra | 6530 | 3.61 | 0.83 | 3.64 | 3.62 | 0.85 | 1.00 | 6.00 | 5.00 | -0.09 | -0.02 | 0.01 |
| SPI_Neuro | 6530 | 4.31 | 0.77 | 4.36 | 4.33 | 0.74 | 1.29 | 6.00 | 4.71 | -0.42 | 0.28 | 0.01 |
| SPI_Open | 6530 | 4.48 | 0.55 | 4.50 | 4.49 | 0.53 | 2.07 | 6.00 | 3.93 | -0.24 | 0.31 | 0.01 |
| SPI_Compassion | 6530 | 51.31 | 9.51 | 53.78 | 52.56 | 8.67 | 15.93 | 63.13 | 47.20 | -1.15 | 1.12 | 0.12 |
| SPI_Irritability | 6530 | 50.97 | 9.87 | 51.27 | 51.15 | 11.81 | 27.87 | 72.50 | 44.63 | -0.13 | -0.96 | 0.12 |
| SPI_Sociability | 6530 | 49.82 | 10.02 | 52.22 | 51.19 | 8.27 | 15.89 | 66.44 | 50.54 | -1.36 | 1.94 | 0.12 |
| SPI_WellBeing | 6488 | 49.03 | 9.80 | 48.95 | 49.04 | 11.22 | 26.47 | 74.40 | 47.94 | -0.01 | -0.84 | 0.12 |
| SPI_SensationSeeking | 6488 | 49.55 | 10.03 | 49.00 | 49.26 | 11.45 | 29.19 | 71.62 | 42.43 | 0.21 | -0.88 | 0.12 |
| SPI_Anxiety | 6530 | 51.88 | 8.90 | 54.46 | 53.24 | 7.80 | 19.27 | 62.10 | 42.82 | -1.26 | 1.19 | 0.11 |
| SPI_Honesty | 6530 | 50.65 | 9.56 | 52.54 | 51.80 | 7.38 | 3.87 | 77.00 | 73.13 | -1.70 | 4.87 | 0.12 |
| SPI_Industry | 6530 | 50.29 | 10.01 | 49.64 | 50.18 | 11.27 | 27.91 | 73.23 | 45.31 | 0.10 | -0.80 | 0.12 |
| SPI_Intellect | 6530 | 49.36 | 10.20 | 51.63 | 50.56 | 9.26 | 14.10 | 65.73 | 51.63 | -1.04 | 0.75 | 0.13 |
| SPI_Creativity | 6530 | 49.72 | 10.03 | 51.03 | 50.57 | 10.36 | 18.38 | 64.67 | 46.29 | -0.70 | 0.03 | 0.12 |
| SPI_Impulsivity | 6488 | 49.85 | 10.09 | 49.16 | 49.47 | 11.50 | 31.84 | 72.72 | 40.88 | 0.27 | -0.82 | 0.13 |
| SPI_AttentionSeeking | 6530 | 49.68 | 10.08 | 52.47 | 50.98 | 8.25 | 26.43 | 66.95 | 40.51 | -1.00 | 0.12 | 0.12 |
| SPI_Order | 6529 | 50.18 | 10.06 | 49.75 | 49.98 | 11.63 | 25.20 | 74.44 | 49.24 | 0.15 | -0.87 | 0.12 |
| SPI_Authoritarianism | 6489 | 50.55 | 9.69 | 52.46 | 51.61 | 8.97 | 15.22 | 67.09 | 51.87 | -0.95 | 0.52 | 0.12 |
| SPI_Charisma | 6487 | 49.74 | 10.00 | 50.32 | 50.11 | 10.90 | 22.58 | 71.58 | 49.00 | -0.30 | -0.57 | 0.12 |
| SPI_Trust | 6530 | 49.83 | 10.00 | 50.04 | 49.87 | 10.68 | 27.80 | 73.20 | 45.40 | -0.03 | -0.71 | 0.12 |
| SPI_Humor | 6489 | 50.65 | 9.62 | 52.72 | 51.87 | 8.77 | 7.52 | 64.74 | 57.21 | -1.25 | 1.81 | 0.12 |
| SPI_EmotionalExpressiveness | 6489 | 50.36 | 10.04 | 50.04 | 50.12 | 12.09 | 32.70 | 71.81 | 39.11 | 0.16 | -1.01 | 0.12 |
| SPI_ArtAppreciation | 6488 | 51.35 | 8.94 | 53.85 | 52.92 | 5.09 | 15.08 | 72.72 | 57.65 | -2.11 | 5.33 | 0.11 |
| SPI_Introspection | 6529 | 50.04 | 9.89 | 51.98 | 51.19 | 9.80 | 15.16 | 62.58 | 47.43 | -0.95 | 0.48 | 0.12 |
| SPI_Perfectionism | 6530 | 50.73 | 9.83 | 51.36 | 51.18 | 10.47 | 19.29 | 69.55 | 50.25 | -0.38 | -0.44 | 0.12 |
| SPI_SelfControl | 6485 | 49.31 | 9.86 | 49.50 | 49.30 | 10.53 | 25.93 | 74.34 | 48.40 | 0.01 | -0.56 | 0.12 |
| SPI_Conformity | 6489 | 50.59 | 9.93 | 51.34 | 50.83 | 10.79 | 28.22 | 71.14 | 42.92 | -0.20 | -0.77 | 0.12 |
| SPI_Adaptability | 6488 | 49.44 | 10.12 | 48.75 | 49.39 | 11.32 | 28.77 | 70.29 | 41.52 | 0.06 | -0.88 | 0.13 |
| SPI_EasyGoingness | 6485 | 49.56 | 9.99 | 50.80 | 50.22 | 9.63 | 13.49 | 69.53 | 56.03 | -0.61 | 0.09 | 0.12 |
| SPI_EmotionalStability | 6529 | 48.20 | 10.02 | 50.26 | 48.89 | 9.90 | 28.98 | 68.44 | 39.46 | -0.56 | -0.68 | 0.12 |
| SPI_Conservatism | 6480 | 49.54 | 10.13 | 51.27 | 50.42 | 10.20 | 28.73 | 70.97 | 42.24 | -0.63 | -0.46 | 0.13 |
descriptives.df %>%
filter(gender == "male") %>%
select(-gender) %>%
kable(., digits = 2) %>%
kable_styling()
| vars | n | mean | sd | median | trimmed | mad | min | max | range | skew | kurtosis | se |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| sex* | 2952 | 2.00 | 0.00 | 2.00 | 2.00 | 0.00 | 2.00 | 2.00 | 0.00 | 0.00 | ||
| BMI | 2934 | 22.84 | 4.90 | 21.62 | 22.21 | 3.69 | 15.01 | 53.76 | 38.75 | 1.48 | 3.05 | 0.09 |
| BMI_p | 2952 | 60.00 | 30.53 | 64.60 | 62.02 | 37.95 | 0.00 | 99.90 | 99.90 | -0.40 | -1.07 | 0.56 |
| BMI_c* | 2952 | Inf | -Inf | -Inf | ||||||||
| p1edu | 2821 | 15.19 | 3.64 | 16.00 | 15.49 | 2.97 | 6.00 | 20.00 | 14.00 | -0.67 | 0.53 | 0.07 |
| p1occPrestige | 2620 | 60.20 | 15.22 | 67.12 | 61.55 | 13.55 | 24.22 | 79.09 | 54.87 | -0.75 | -0.62 | 0.30 |
| p1occIncomeEst | 2570 | 61491.45 | 22195.84 | 60244.00 | 60663.71 | 22141.15 | 21980.00 | 112490.00 | 90510.00 | 0.29 | -0.44 | 437.83 |
| p2edu | 2660 | 14.71 | 3.58 | 16.00 | 14.95 | 2.97 | 6.00 | 20.00 | 14.00 | -0.60 | 0.55 | 0.07 |
| p2occPrestige | 2147 | 57.07 | 15.59 | 57.98 | 58.02 | 18.55 | 24.22 | 79.09 | 54.87 | -0.50 | -0.98 | 0.34 |
| p2occIncomeEst | 2111 | 57247.11 | 22364.35 | 52210.00 | 55755.02 | 24789.07 | 21980.00 | 112490.00 | 90510.00 | 0.60 | -0.04 | 486.76 |
| ses | 2952 | -0.06 | 0.78 | 0.05 | -0.01 | 0.75 | -2.56 | 2.32 | 4.89 | -0.62 | 0.48 | 0.01 |
| cog | 2948 | 49.65 | 9.60 | 49.71 | 49.68 | 10.12 | 19.86 | 76.53 | 56.68 | -0.06 | -0.37 | 0.18 |
| SPI_Agree | 2952 | 4.11 | 0.70 | 4.21 | 4.17 | 0.53 | 1.00 | 6.00 | 5.00 | -1.00 | 1.88 | 0.01 |
| SPI_Consc | 2952 | 3.77 | 0.63 | 3.79 | 3.77 | 0.53 | 1.43 | 6.00 | 4.57 | -0.01 | 0.64 | 0.01 |
| SPI_Extra | 2952 | 3.62 | 0.81 | 3.64 | 3.63 | 0.74 | 1.00 | 6.00 | 5.00 | -0.20 | 0.24 | 0.01 |
| SPI_Neuro | 2952 | 3.79 | 0.81 | 3.86 | 3.81 | 0.74 | 1.00 | 6.00 | 5.00 | -0.27 | 0.29 | 0.01 |
| SPI_Open | 2952 | 4.55 | 0.53 | 4.57 | 4.56 | 0.53 | 2.21 | 6.00 | 3.79 | -0.27 | 0.50 | 0.01 |
| SPI_Compassion | 2952 | 47.10 | 10.40 | 48.58 | 48.07 | 9.76 | 15.93 | 63.13 | 47.20 | -0.83 | 0.40 | 0.19 |
| SPI_Irritability | 2952 | 47.83 | 9.94 | 47.40 | 47.58 | 11.58 | 27.87 | 72.50 | 44.63 | 0.18 | -0.91 | 0.18 |
| SPI_Sociability | 2952 | 50.46 | 9.87 | 53.03 | 51.88 | 7.51 | 15.89 | 66.44 | 50.54 | -1.51 | 2.53 | 0.18 |
| SPI_WellBeing | 2925 | 52.24 | 10.04 | 53.13 | 52.67 | 11.35 | 26.47 | 74.40 | 47.94 | -0.30 | -0.77 | 0.19 |
| SPI_SensationSeeking | 2921 | 50.98 | 9.86 | 50.67 | 50.85 | 10.93 | 30.74 | 71.42 | 40.68 | 0.11 | -0.88 | 0.18 |
| SPI_Anxiety | 2952 | 45.79 | 10.97 | 47.72 | 46.64 | 11.56 | 17.73 | 62.10 | 44.37 | -0.58 | -0.59 | 0.20 |
| SPI_Honesty | 2952 | 48.62 | 10.74 | 50.53 | 49.85 | 8.06 | 3.87 | 77.00 | 73.13 | -1.49 | 3.79 | 0.20 |
| SPI_Industry | 2952 | 49.39 | 9.93 | 48.86 | 49.13 | 10.98 | 28.29 | 73.92 | 45.64 | 0.20 | -0.73 | 0.18 |
| SPI_Intellect | 2952 | 51.43 | 9.39 | 53.71 | 52.62 | 8.79 | 14.10 | 65.73 | 51.63 | -1.15 | 1.25 | 0.17 |
| SPI_Creativity | 2952 | 50.56 | 9.92 | 52.20 | 51.47 | 10.46 | 18.38 | 64.67 | 46.29 | -0.77 | 0.17 | 0.18 |
| SPI_Impulsivity | 2921 | 50.31 | 9.78 | 49.82 | 49.99 | 10.76 | 31.84 | 72.72 | 40.88 | 0.24 | -0.76 | 0.18 |
| SPI_AttentionSeeking | 2952 | 50.72 | 9.77 | 53.52 | 52.13 | 7.58 | 26.43 | 65.70 | 39.27 | -1.14 | 0.54 | 0.18 |
| SPI_Order | 2951 | 49.64 | 9.87 | 49.08 | 49.39 | 11.12 | 25.20 | 74.44 | 49.24 | 0.18 | -0.77 | 0.18 |
| SPI_Authoritarianism | 2925 | 48.87 | 10.55 | 51.21 | 49.93 | 9.93 | 13.46 | 67.09 | 53.63 | -0.86 | 0.23 | 0.20 |
| SPI_Charisma | 2926 | 50.59 | 9.97 | 51.49 | 51.01 | 10.96 | 21.90 | 71.58 | 49.68 | -0.34 | -0.54 | 0.18 |
| SPI_Trust | 2952 | 50.44 | 9.94 | 50.96 | 50.57 | 10.52 | 27.80 | 73.20 | 45.40 | -0.10 | -0.61 | 0.18 |
| SPI_Humor | 2923 | 48.57 | 10.62 | 50.33 | 49.73 | 10.16 | 7.52 | 64.74 | 57.21 | -1.02 | 1.04 | 0.20 |
| SPI_EmotionalExpressiveness | 2923 | 49.18 | 9.84 | 48.23 | 48.76 | 11.20 | 32.70 | 71.81 | 39.11 | 0.31 | -0.88 | 0.18 |
| SPI_ArtAppreciation | 2921 | 46.96 | 11.47 | 50.20 | 48.80 | 7.78 | 15.08 | 72.72 | 57.65 | -1.35 | 1.37 | 0.21 |
| SPI_Introspection | 2951 | 49.92 | 10.21 | 52.19 | 51.08 | 10.56 | 15.16 | 62.58 | 47.43 | -0.88 | 0.19 | 0.19 |
| SPI_Perfectionism | 2952 | 48.37 | 10.18 | 48.80 | 48.70 | 10.96 | 18.72 | 69.55 | 50.82 | -0.27 | -0.47 | 0.19 |
| SPI_SelfControl | 2919 | 51.54 | 10.12 | 51.77 | 51.70 | 10.72 | 25.93 | 74.34 | 48.40 | -0.13 | -0.51 | 0.19 |
| SPI_Conformity | 2926 | 48.76 | 10.03 | 49.07 | 48.80 | 11.21 | 28.22 | 70.64 | 42.42 | -0.03 | -0.82 | 0.19 |
| SPI_Adaptability | 2921 | 51.24 | 9.60 | 51.22 | 51.44 | 10.68 | 28.77 | 70.29 | 41.52 | -0.13 | -0.75 | 0.18 |
| SPI_EasyGoingness | 2920 | 50.97 | 9.98 | 52.52 | 51.78 | 9.37 | 13.49 | 69.53 | 56.03 | -0.75 | 0.33 | 0.18 |
| SPI_EmotionalStability | 2952 | 54.01 | 8.69 | 56.24 | 55.20 | 6.98 | 28.98 | 67.27 | 38.29 | -1.22 | 1.17 | 0.16 |
| SPI_Conservatism | 2919 | 51.07 | 9.62 | 52.89 | 52.08 | 9.05 | 28.73 | 70.97 | 42.24 | -0.79 | -0.02 | 0.18 |
library(corrplot)
corrplot(R_female, method = "square",
title = "\nZero-order correlations among study variables\nFemale Participants",
tl.col = "black",
mar=c(0,0,1,0))
corrplot(R_male, method = "square",
title = "\nZero-order correlations among study variables\nMale Participants",
tl.col = "black",
mar=c(0,0,1,0))
Multinomial logistic regression models were built that regressed BMI category onto parental socio-economic status and adolescent individual differences. Two basic models were constructed: one that hypothesized parental SES:
\[BMI_i = b_0 + b_1(SES_i) + b_2(ID_i) + e_i\]
and an individual difference were two independent predictors of BMI, and a second that hypothesized these variables interacted with each other:
\[BMI_i = b_0 + b_1(SES_i) + b_2(ID_i) + b_3(SES_i\times ID_i) + e_i\] We iterated through all individual differences – the broad Big Five personality traits, the narrow SPI-27 traits, and cognitive ability – and tested each one independently in the model as an individual difference.
Models were estimated separately for men and women.
# end goal of wrangling is a data frame of data frames
# nested dataframes correspond to a single personality trait
# score refers to a participant's score on that trait
# we also standardize each of our variables within gender
sapa_male_trait = sapa_male %>%
dplyr::select(-starts_with("p1"), -starts_with("p2"), -starts_with("edu")) %>%
mutate(BMI_c = factor(BMI_c, levels = c("Normal Weight", "Underweight", "Overweight", "Obese"))) %>%
mutate(set = ifelse(row_number() %in% train_male[,1], "train", "test")) %>%
gather("trait_name", "trait_score", -ses, -BMI_c, -BMI, -BMI_p, -set) %>%
group_by(trait_name, set) %>%
mutate(trait_score = scale(trait_score)) %>%
ungroup() %>%
group_by(trait_name) %>%
nest()
sapa_female_trait = sapa_female %>%
dplyr::select(-starts_with("p1"), -starts_with("p2"), -starts_with("edu")) %>%
mutate(BMI_c = factor(BMI_c, levels = c("Normal Weight", "Underweight", "Overweight", "Obese"))) %>%
mutate(set = ifelse(row_number() %in% train_male[,1], "train", "test")) %>%
gather("trait_name", "trait_score", -ses, -BMI_c, -BMI, -BMI_p, -set) %>%
group_by(trait_name, set) %>%
mutate(trait_score = scale(trait_score)) %>%
ungroup() %>%
group_by(trait_name) %>%
nest()
To estimate the effect of socioeconomic status on BMI category, we graph the estimates of the SES slope coefficient across all logistic regression models controlling for individual differences. This presents not only the average estimate across all models (solid line), but the range of estimates – a wide range suggests that the effect of SES on BMI is sensitive to the inclusion of different individual difference measures, while a narrow range suggests that the effect of SES on BMI is persistent through personality and cognition.
Parental socioeconomic status positively predicted greater likelihood of all non-normal categories (Underweight, Overweight, and Obese) compared to Normal among girls. Adolescent girls living in higher SES households were, on average, 13% less likely to be Underweight, 31% less likely, and 45% less likely to be Obese compared to low SES counterparts.
Parental socioeconomic status positively predicted greater likelihood of all non-normal categories (Underweight, Overweight, and Obese) compared to Normal among boys. Adolescent boys living in higher SES households were, on average, 15% less likely to be Underweight, 21% less likely, and 35% less likely to be Obese compared to low SES counterparts.
To estimate the joint effect of socioeconomic status and individual differences on BMI category, we graph the estimates of the interaction terms of SES by individual differences by BMI category. Like before, we present the average effect (solid black line) and the 95% confidence intervals for each model.
Parental socioeconomic status positively predicted greater likelihood of all non-normal categories (Underweight, Overweight, and Obese) compared to Normal among girls. Adolescent girls living in higher SES households were, on average, -2% less likely to be Underweight, -5% less likely, and -3% less likely to be Obese compared to low SES counterparts.
Parental socioeconomic status positively predicted greater likelihood of all non-normal categories (Underweight, Overweight, and Obese) compared to Normal among boys. Adolescent boys living in higher SES households were, on average, 3% more likely to be Underweight, -3% more likely, and 17% more likely to be Obese compared to low SES counterparts.
female_ses_only = train(BMI_c ~ ses, data = sapa_female,
subset = train_female,
method = "multinom",
na.action = "na.exclude",
trControl = ctrl)
female_log = sapa_female_trait %>%
# train models on training subset; use mulintomial logistic regression; use specific formula
mutate(
cov = map(data, ~train(BMI_c ~ trait_score + ses, data = .,
subset = train_female,
method = "multinom",
na.action = "na.exclude",
trControl = ctrl)),
int = map(data, ~train(BMI_c ~ trait_score*ses, data = .,
subset = train_female,
method = "multinom",
na.action = "na.exclude",
trControl = ctrl))) %>%
gather("model", "output", cov, int) %>%
# create test data from all rows not used in training
mutate(test_data = map(data, .f = function(x) x[-train_female, ]),
#extract reference (true) BMI categories from test data
test_reference = map(test_data, "BMI_c"),
# predict categories from model output; na.pass puts NAs in any row with missing data
predicted = map2(output, test_data, predict, na.action = "na.pass"),
# calculate accuracy, sensitivity, specificity, etc
confusion = map2(predicted, test_reference, confusionMatrix),
# extract final model coefficients
final_mod = map(output, "finalModel"),
# tidy output for printing
coef = map(final_mod, broom::tidy, conf.int = TRUE))
male_ses_only = train(BMI_c ~ ses, data = sapa_male,
subset = train_male,
method = "multinom",
maxit= 1000,
na.action = "na.exclude",
trControl = ctrl)
accuracy = predict(male_ses_only, type="raw", newdata=sapa_male[-train_male, ])
postResample(sapa_male[-train_male, "BMI_c"], accuracy)
male_log = sapa_male_trait %>%
# train models on training subset; use mulintomial logistic regression; use specific formula
mutate(
cov = map(data, ~train(BMI_c ~ trait_score + ses, data = .,
subset = train_male,
method = "multinom",
na.action = "na.exclude",
trControl = ctrl)),
int = map(data, ~train(BMI_c ~ trait_score*ses, data = .,
subset = train_male,
method = "multinom",
na.action = "na.exclude",
trControl = ctrl))) %>%
gather("model", "output", cov, int) %>%
# create test data from all rows not used in training
mutate(test_data = map(data, .f = function(x) x[-train_male, ]),
#extract reference (true) BMI categories from test data
test_reference = map(test_data, "BMI_c"),
# predict categories from model output; na.pass puts NAs in any row with missing data
predicted = map2(output, test_data, predict, na.action = "na.pass"),
# calculate accuracy, sensitivity, specificity, etc
confusion = map2(predicted, test_reference, confusionMatrix),
# extract final model coefficients
final_mod = map(output, "finalModel"),
# tidy output for printing
coef = map(final_mod, broom::tidy, conf.int = TRUE))
| Trait | Obese | Overweight | Underweight |
|---|---|---|---|
| Cognitive Ability | 0.79 | 0.97 | 0.97 |
| [0.60, 1.04] | [0.76, 1.22] | [0.84, 1.12] | |
| SPI: 27 Factors | |||
| Compassion | 0.99 | 1.02 | 1.12 |
| [0.76, 1.29] | [0.81, 1.29] | [0.96, 1.31] | |
| Irritability | 1.18 | 1.16 | 1.08 |
| [0.90, 1.55] | [0.89, 1.51] | [0.93, 1.25] | |
| Sociability | 0.75* | 0.78* | 0.66* |
| [0.58, 0.98] | [0.62, 0.99] | [0.57, 0.76] | |
| Well Being | 0.52* | 0.53* | 0.62* |
| [0.38, 0.70] | [0.40, 0.71] | [0.53, 0.72] | |
| Sensation Seeking | 1.03 | 1.13 | 1.11 |
| [0.79, 1.35] | [0.89, 1.44] | [0.97, 1.28] | |
| Anxiety | 1.11 | 1.24 | 1.19* |
| [0.86, 1.44] | [0.96, 1.59] | [1.03, 1.37] | |
| Honesty | 0.72* | 0.79* | 0.94 |
| [0.56, 0.94] | [0.62, 0.99] | [0.80, 1.11] | |
| Industry | 0.71* | 0.88 | 0.76* |
| [0.54, 0.95] | [0.69, 1.13] | [0.65, 0.88] | |
| Intellect | 1.26 | 1.07 | 0.98 |
| [0.96, 1.67] | [0.84, 1.36] | [0.85, 1.13] | |
| Creativity | 0.94 | 0.99 | 1.07 |
| [0.72, 1.23] | [0.79, 1.24] | [0.92, 1.24] | |
| Impulsivity | 1.20 | 0.97 | 1.13 |
| [0.93, 1.55] | [0.76, 1.25] | [0.98, 1.31] | |
| Attention Seeking | 0.83 | 1.16 | 0.77* |
| [0.63, 1.08] | [0.89, 1.51] | [0.67, 0.89] | |
| Order | 0.84 | 0.96 | 0.89 |
| [0.62, 1.14] | [0.75, 1.23] | [0.76, 1.04] | |
| Authoritarianism | 1.06 | 1.02 | 0.87 |
| [0.82, 1.37] | [0.80, 1.29] | [0.75, 1.01] | |
| Charisma | 1.02 | 0.93 | 0.84* |
| [0.79, 1.31] | [0.73, 1.18] | [0.73, 0.97] | |
| Trust | 0.78 | 0.90 | 0.79* |
| [0.58, 1.05] | [0.72, 1.14] | [0.68, 0.92] | |
| Humor | 1.65* | 1.01 | 0.98 |
| [1.14, 2.38] | [0.81, 1.27] | [0.85, 1.13] | |
| Emotional Expressiveness | 0.70* | 0.73* | 0.76* |
| [0.53, 0.92] | [0.57, 0.92] | [0.66, 0.87] | |
| Art Appreciation | 1.31 | 1.02 | 1.72* |
| [0.91, 1.89] | [0.81, 1.29] | [1.41, 2.09] | |
| Introspection | 1.06 | 0.85 | 1.25* |
| [0.81, 1.38] | [0.67, 1.07] | [1.07, 1.46] | |
| Perfectionism | 0.88 | 0.88 | 0.80* |
| [0.66, 1.18] | [0.69, 1.11] | [0.69, 0.92] | |
| Self Control | 0.49* | 1.14 | 1.07 |
| [0.35, 0.69] | [0.89, 1.46] | [0.92, 1.24] | |
| Conformity | 1.00 | 1.15 | 0.84* |
| [0.77, 1.29] | [0.91, 1.45] | [0.73, 0.97] | |
| Adaptability | 0.83 | 1.10 | 0.90 |
| [0.63, 1.09] | [0.87, 1.40] | [0.77, 1.04] | |
| Easy Goingness | 1.60* | 1.69* | 1.26* |
| [1.17, 2.20] | [1.29, 2.21] | [1.08, 1.47] | |
| Emotional Stability | 0.80 | 0.89 | 0.75* |
| [0.63, 1.02] | [0.70, 1.12] | [0.65, 0.86] | |
| Conservatism | 0.81 | 1.00 | 0.86* |
| [0.61, 1.06] | [0.78, 1.28] | [0.74, 0.99] | |
| SPI: 5 Factors | |||
| Agreeableness | 0.81 | 1.03 | 0.91 |
| [0.64, 1.04] | [0.81, 1.32] | [0.79, 1.06] | |
| Conscientiousness | 0.83 | 0.98 | 0.75* |
| [0.64, 1.09] | [0.76, 1.26] | [0.64, 0.88] | |
| Extraversion | 0.70* | 0.89 | 0.75* |
| [0.54, 0.90] | [0.71, 1.13] | [0.65, 0.86] | |
| Neuroticism | 1.61* | 1.27 | 1.39* |
| [1.20, 2.17] | [0.96, 1.68] | [1.20, 1.62] | |
| Openness | 1.06 | 1.04 | 1.24* |
| [0.81, 1.40] | [0.82, 1.33] | [1.07, 1.44] | |
| Trait | Obese | Overweight | Underweight |
|---|---|---|---|
| Cognitive Ability | 0.87 | 0.74* | 0.90 |
| [0.68, 1.12] | [0.56, 0.98] | [0.78, 1.04] | |
| SPI: 27 Factors | |||
| Compassion | 0.96 | 0.95 | 1.04 |
| [0.77, 1.20] | [0.73, 1.24] | [0.90, 1.21] | |
| Irritability | 1.18 | 1.25 | 0.95 |
| [0.92, 1.53] | [0.95, 1.65] | [0.82, 1.11] | |
| Sociability | 0.87 | 0.83 | 0.78* |
| [0.68, 1.10] | [0.64, 1.08] | [0.67, 0.90] | |
| Well Being | 0.81 | 1.02 | 0.75* |
| [0.62, 1.05] | [0.79, 1.32] | [0.65, 0.87] | |
| Sensation Seeking | 0.83 | 1.25 | 0.74* |
| [0.63, 1.08] | [0.94, 1.65] | [0.63, 0.87] | |
| Anxiety | 0.85 | 0.82 | 1.09 |
| [0.65, 1.10] | [0.63, 1.07] | [0.94, 1.27] | |
| Honesty | 0.98 | 0.96 | 0.98 |
| [0.74, 1.28] | [0.74, 1.25] | [0.83, 1.15] | |
| Industry | 0.94 | 0.94 | 0.92 |
| [0.73, 1.23] | [0.71, 1.24] | [0.79, 1.07] | |
| Intellect | 1.06 | 1.24 | 0.92 |
| [0.79, 1.42] | [0.86, 1.80] | [0.78, 1.09] | |
| Creativity | 0.95 | 1.06 | 1.03 |
| [0.76, 1.19] | [0.79, 1.41] | [0.88, 1.19] | |
| Impulsivity | 1.12 | 1.11 | 1.04 |
| [0.88, 1.43] | [0.86, 1.43] | [0.89, 1.21] | |
| Attention Seeking | 0.78* | 0.87 | 0.73* |
| [0.62, 0.99] | [0.66, 1.14] | [0.63, 0.85] | |
| Order | 1.11 | 0.94 | 0.92 |
| [0.86, 1.42] | [0.72, 1.22] | [0.79, 1.06] | |
| Authoritarianism | 1.06 | 0.99 | 1.12 |
| [0.81, 1.40] | [0.74, 1.32] | [0.96, 1.32] | |
| Charisma | 0.87 | 1.28 | 0.79* |
| [0.67, 1.14] | [0.98, 1.68] | [0.68, 0.91] | |
| Trust | 1.13 | 0.90 | 1.16 |
| [0.87, 1.47] | [0.68, 1.20] | [0.99, 1.36] | |
| Humor | 1.41* | 0.91 | 0.84* |
| [1.06, 1.88] | [0.69, 1.19] | [0.72, 0.98] | |
| Emotional Expressiveness | 0.81 | 1.30 | 0.95 |
| [0.63, 1.05] | [1.00, 1.69] | [0.82, 1.11] | |
| Art Appreciation | 0.97 | 0.85 | 1.03 |
| [0.75, 1.25] | [0.67, 1.08] | [0.88, 1.21] | |
| Introspection | 1.03 | 0.87 | 0.94 |
| [0.78, 1.36] | [0.68, 1.12] | [0.80, 1.09] | |
| Perfectionism | 0.92 | 0.80 | 1.01 |
| [0.72, 1.17] | [0.61, 1.05] | [0.86, 1.19] | |
| Self Control | 0.81 | 0.83 | 1.00 |
| [0.62, 1.06] | [0.65, 1.07] | [0.85, 1.17] | |
| Conformity | 0.99 | 0.99 | 1.01 |
| [0.76, 1.28] | [0.78, 1.26] | [0.86, 1.17] | |
| Adaptability | 1.08 | 1.18 | 1.02 |
| [0.82, 1.42] | [0.89, 1.55] | [0.88, 1.19] | |
| Easy Goingness | 1.53* | 1.22 | 1.23* |
| [1.13, 2.07] | [0.94, 1.59] | [1.05, 1.43] | |
| Emotional Stability | 1.10 | 1.11 | 0.87* |
| [0.84, 1.43] | [0.85, 1.45] | [0.75, 1.00] | |
| Conservatism | 0.98 | 1.00 | 0.82* |
| [0.75, 1.27] | [0.77, 1.30] | [0.71, 0.96] | |
| SPI: 5 Factors | |||
| Agreeableness | 0.76* | 1.25 | 1.13 |
| [0.60, 0.97] | [0.92, 1.68] | [0.96, 1.33] | |
| Conscientiousness | 0.90 | 0.80 | 0.90 |
| [0.68, 1.19] | [0.60, 1.06] | [0.77, 1.06] | |
| Extraversion | 0.79 | 1.01 | 0.77* |
| [0.61, 1.03] | [0.76, 1.35] | [0.66, 0.89] | |
| Neuroticism | 1.17 | 1.42* | 1.22* |
| [0.93, 1.49] | [1.08, 1.86] | [1.06, 1.41] | |
| Openness | 1.22 | 0.92 | 0.92 |
| [0.94, 1.57] | [0.69, 1.23] | [0.79, 1.08] | |
Once we filter for adolescents living in the United States, approximately half our sample did not report either their height or weight. Given the sensitivity of body image, especially for the adolescent girls in our sample, we suspect these values are missing not at random (MNAR) and may impact the estimates here. To test for these effects, we imputed missing height and weight values using a principal components analysis approach, using only the other variables in the SAPA dataset that were not included in the analyses above. We repeated the logistic regression models with 10-fold cross validation, repeated 10 times, and report here the differences in significance across models and by term, as well as the estimates of the coefficients that were significant only in the complete data or in the imputed data.